The catch: We spent a lot of time on the map and did not figure out what to remove, what is messy data etc..

check that because you zone for one place in em school, doesn’t mean all people who zoned to that em school will zone to same mid school

We took one zone and proved that you cannot make that assumption

zoneData %>% filter(zoned_elm_dbn == "01M015") %>% group_by(zoned_mid_dbn) %>% summarise(count = n())
## # A tibble: 3 x 2
##   zoned_mid_dbn count
##           <chr> <int>
## 1        01M972   155
## 2        01M973   339
## 3          <NA>     3

Data is clean (each year has each student once)

count_df <- zoneData %>% group_by(year, student_id_scram) %>% summarise(count = n()) %>%
  group_by(count)%>% summarise()

count_df
## # A tibble: 1 x 1
##   count
##   <int>
## 1     1

see na’s in the data

zoneData %>% filter(is.na(zoned_elm_dbn))
## # A tibble: 152,198 x 12
##    student_id_scram res_zip_cde zoned_elm_dbn zoned_mid_dbn zoned_hs_dbn
##               <int>       <chr>         <chr>         <chr>        <chr>
##  1        411702894       11233          <NA>          <NA>         <NA>
##  2        981802723       10027          <NA>          <NA>         <NA>
##  3        288472869       11233          <NA>          <NA>         <NA>
##  4        347572670       11233          <NA>          <NA>         <NA>
##  5        166902009       11235          <NA>          <NA>         <NA>
##  6        239802916       10009          <NA>          <NA>         <NA>
##  7         98612290       10002          <NA>          <NA>         <NA>
##  8         41602964       10002          <NA>          <NA>         <NA>
##  9        890612332       10002          <NA>          <NA>         <NA>
## 10        673802946       10027          <NA>          <NA>         <NA>
## # ... with 152,188 more rows, and 7 more variables: census_block <chr>,
## #   census_tract <chr>, year <dbl>, res_boro <chr>, res_district <int>,
## #   audit_dte <int>, last_change_dte <int>

count students who are zoned to same elementary school

zoneData %>% group_by(zoned_elm_dbn) %>% tally
## # A tibble: 949 x 2
##    zoned_elm_dbn     n
##            <chr> <int>
##  1         00678     1
##  2         00767     1
##  3         00921     1
##  4         00961     1
##  5         00976     1
##  6        01M015   497
##  7        01M019   615
##  8        01M020   589
##  9        01M034   569
## 10        01M063   704
## # ... with 939 more rows

Use census tracts to group students by area

tracts <- zoneData %>% group_by(census_tract) %>% summarise(numStudents =n())
tracts%>% head()
## # A tibble: 6 x 2
##   census_tract numStudents
##          <chr>       <int>
## 1       000100         332
## 2       000200        1493
## 3       000201         516
## 4       000202         891
## 5       000300         220
## 6       000301         131

plot count of students in their tracts (all years) –Where they LIVE

tracts_map <- merge(nyc_tracts, tracts, by.x = "TRACTCE", by.y ="census_tract")

#plot(nyc_tracts)
leaflet(nyc_tracts) %>%
  addTiles() %>% 
  addPolygons(popup = ~paste("Tract:", TRACTCE)) %>%
  addProviderTiles("CartoDB.Positron") %>%
  setView(-73.98, 40.75, zoom = 13)
leaflet(tracts_map) %>%
  addTiles() %>% 
  addPolygons(popup = ~paste("Num students:", numStudents)) %>%
  addProviderTiles("CartoDB.Positron")%>%
  setView(-73.98, 40.75, zoom = 13)

number of each zips in our dataset

zoneData%>% 
  group_by(res_zip_cde) %>% 
  summarise(count = n())
## # A tibble: 10,583 x 2
##    res_zip_cde count
##          <chr> <int>
##  1           0   116
##  2       00000   129
##  3       00002     1
##  4       00018     1
##  5       00048     1
##  6       00051     1
##  7       00064     1
##  8       00072     1
##  9       00104     1
## 10       00110     1
## # ... with 10,573 more rows
#r <- GET("http://www2.census.gov/geo/tiger/TIGER2010DP1/County_2010Census_DP1.zip")
#r<- GET("https://data.cityofnewyork.us/download/hkaz-iizd/application%2Fzip")


#r<- GET("https://data.cityofnewyork.us/api/geospatial/mshx-yvwq?method=export&format=GeoJSON")
#dbns <- readOGR(content(r,'text'), 'OGRGeoJSON', verbose = F)

Get elementary data from NYC open data

#elementary data only
r<- GET("https://data.cityofnewyork.us/api/geospatial/cq6p-iwiy?method=export&format=GeoJSON")
dbns <- readOGR(content(r,'text'), 'OGRGeoJSON', verbose = F)
## No encoding supplied: defaulting to UTF-8.
## Warning in readOGR(content(r, "text"), "OGRGeoJSON", verbose = F): Dropping
## null geometries: 752
#summary(dbns)

#dbns@data$dbn

zones_df <- tidy(dbns)
## Regions defined for each Polygons
#ggplot() + 
 # geom_polygon(data=zones_df, aes(x=long, y=lat, group=group), alpha = .25, colour = "black", fill = NA)
nyc_map <- get_map(location = c(lon = -74.00, lat = 40.71), maptype = "terrain", zoom = 11)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=40.71,-74&zoom=11&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
#ggmap(nyc_map) + 
 # geom_polygon(data=zones_df, aes(x=long, y=lat, group=group), alpha = .25, colour = "black", fill = NA)

Plot this data (includes all years)

leaflet(dbns) %>%
  addTiles() %>% 
  addPolygons(popup = ~paste("School DBN:", dbn))%>%
  addProviderTiles("CartoDB.Positron")%>%
  setView(-73.98, 40.75, zoom = 13)
#zone_dbn <- zoneData %>% group_by(zoned_elm_dbn) %>% summarise(count = n())

get count for specific year. This is year of shape file

zone_dbn <- zoneData %>% filter(year ==2015) %>% group_by(zoned_elm_dbn) %>% summarise(count = n())

#remove na counts
zone_dbn <- zone_dbn %>% filter(!is.na(count))

map_data <- merge(dbns, zone_dbn, by.x = "dbn", by.y ="zoned_elm_dbn")

pal <- colorNumeric(palette = "RdBu",
                    domain = range(map_data@data$count, na.rm=T))

leaflet(map_data) %>%
  addTiles() %>% 
  addPolygons(fillColor = ~pal(count), popup = ~paste("Num students in 2015:", as.character(count)))%>% 
  addProviderTiles("CartoDB.Positron") %>%
  setView(-73.98, 40.75, zoom = 13)

#Use zip Codes to map data ## This is across ALL YEARS!

#filter out na zips
zone_zip <-zoneData %>% 
  mutate(res_zip_cde = as.integer(res_zip_cde)) %>% 
  filter(!is.na(res_zip_cde)) %>%
  select(student_id_scram, res_zip_cde, contains("zoned")) %>%
  group_by(res_zip_cde) %>%
  summarise(numStudents = n())
## Warning in evalq(as.integer(res_zip_cde), <environment>): NAs introduced by
## coercion
testZip  <- GET("http://catalog.civicdashboards.com/dataset/11fd957a-8885-42ef-aa49-5c879ec93fac/resource/28377e88-8a50-428f-807c-40ba1f09159b/download/nyc-zip-code-tabulation-areas-polygons.geojson")
zips <- readOGR(content(testZip,'text'), 'OGRGeoJSON', verbose = F)
## No encoding supplied: defaulting to UTF-8.
#summary(zips)

#zips_df <- tidy(zips)

map_data_zip <- merge(zips, zone_zip, by.y = "res_zip_cde", by.x ="postalCode")

leaflet(map_data_zip) %>%
  addTiles() %>% 
  addPolygons(popup = ~paste("Number Students", as.character(numStudents)))%>% 
  addProviderTiles("CartoDB.Positron") %>%
  setView(-73.98, 40.75, zoom = 13)
nyc_map <- get_map(location = c(lon = -74.00, lat = 40.71), maptype = "terrain", zoom = 11)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=40.71,-74&zoom=11&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false

Further work

#dbnBioData <- bios %>%
 # select(student_id_scram, dbn, grade_level, year)

# dbnBioData <-  rename(dbnBioData, BioDbn = dbn)
 
 #merged <- merge(dbnBioData, zoneData, by = "student_id_scram" )
write.table(tracts, file = "tracts.txt", sep = "\t" , col.names = TRUE)